In [2]:
import spacy
nlp = spacy.load('en_core_web_lg')
In [3]:
nlp(u'lion').vector
Out[3]:
array([ 1.8963e-01, -4.0309e-01, 3.5350e-01, -4.7907e-01, -4.3311e-01,
2.3857e-01, 2.6962e-01, 6.4332e-02, 3.0767e-01, 1.3712e+00,
-3.7582e-01, -2.2713e-01, -3.5657e-01, -2.5355e-01, 1.7543e-02,
3.3962e-01, 7.4723e-02, 5.1226e-01, -3.9759e-01, 5.1333e-03,
-3.0929e-01, 4.8911e-02, -1.8610e-01, -4.1702e-01, -8.1639e-01,
-1.6908e-01, -2.6246e-01, -1.5983e-02, 1.2479e-01, -3.7276e-02,
-5.7125e-01, -1.6296e-01, 1.2376e-01, -5.5464e-02, 1.3244e-01,
2.7519e-02, 1.2592e-01, -3.2722e-01, -4.9165e-01, -3.5559e-01,
-3.0630e-01, 6.1185e-02, -1.6932e-01, -6.2405e-02, 6.5763e-01,
-2.7925e-01, -3.0450e-03, -2.2400e-02, -2.8015e-01, -2.1975e-01,
-4.3188e-01, 3.9864e-02, -2.2102e-01, -4.2693e-02, 5.2748e-02,
2.8726e-01, 1.2315e-01, -2.8662e-02, 7.8294e-02, 4.6754e-01,
-2.4589e-01, -1.1064e-01, 7.2250e-02, -9.4980e-02, -2.7548e-01,
-5.4097e-01, 1.2823e-01, -8.2408e-02, 3.1035e-01, -6.3394e-02,
-7.3755e-01, -5.4992e-01, 9.9999e-02, -2.0758e-01, -3.9674e-02,
2.0664e-01, -9.7557e-02, -3.7092e-01, 2.7901e-01, -6.2218e-01,
-1.0280e-01, 2.3271e-01, 4.3838e-01, 3.2445e-02, -2.9866e-01,
-7.3611e-02, 7.1594e-01, 1.4241e-01, 2.7770e-01, -3.9892e-01,
3.6656e-02, 1.5759e-01, 8.2014e-02, -5.7343e-01, 3.5457e-01,
2.2491e-01, -6.2699e-01, -8.8106e-02, 2.4361e-01, 3.8533e-01,
-1.4083e-01, 1.7691e-01, 7.0897e-02, 1.7951e-01, -4.5907e-01,
-8.2120e-01, -2.6631e-02, 6.2549e-02, 4.2415e-01, -8.9630e-02,
-2.4654e-01, 1.4156e-01, 4.0187e-01, -4.1232e-01, 8.4516e-02,
-1.0626e-01, 7.3145e-01, 1.9217e-01, 1.4240e-01, 2.8511e-01,
-2.9454e-01, -2.1948e-01, 9.0460e-01, -1.9098e-01, -1.0340e+00,
-1.5754e-01, -1.1964e-01, 4.9888e-01, -1.0624e+00, -3.2820e-01,
-1.1232e-02, -7.9482e-01, 3.7275e-01, -6.8710e-03, -2.5772e-01,
-4.7005e-01, -4.1387e-01, -6.4089e-02, -2.8033e-01, -4.0778e-02,
-2.4866e+00, 6.2494e-03, -1.0210e-02, 1.2752e-01, 3.4965e-01,
-1.2571e-01, 3.1570e-01, 4.1926e-01, 2.0056e-01, -5.5984e-01,
-2.2801e-01, 1.2012e-01, -2.0518e-03, -8.9764e-02, -8.0373e-02,
1.1969e-02, -2.6978e-01, 3.4829e-01, 7.3664e-03, -1.1137e-01,
6.3410e-01, 3.8449e-01, -6.2248e-01, 4.1145e-02, 2.5922e-01,
6.5811e-01, -4.9548e-01, -1.3030e-01, -3.8279e-01, 1.1156e-01,
-4.3085e-01, 3.4473e-01, 2.7109e-02, -2.5108e-01, -2.8011e-01,
2.1662e-01, 3.2660e-01, 5.5895e-02, 7.6077e-02, -5.2480e-02,
4.5928e-02, -2.5266e-01, 5.2845e-01, -1.3145e-01, -1.2453e-01,
4.0556e-01, 3.1877e-01, 2.4415e-02, -2.2620e-01, -6.1960e-01,
-4.0886e-01, -3.5534e-02, -5.5123e-03, 2.3438e-01, 8.7854e-01,
-2.5161e-01, 4.0600e-01, -4.4284e-01, 3.4934e-01, -5.6429e-01,
-2.3676e-01, 6.2199e-01, -2.8175e-01, 4.2024e-01, 1.0043e-01,
-1.4720e-01, 4.9593e-01, -3.5850e-01, -1.3998e-01, -2.7494e-01,
2.3827e-01, 5.7268e-01, 7.9025e-02, 1.7872e-02, -2.1829e-01,
5.5050e-02, -5.4200e-01, 1.6788e-01, 3.9065e-01, 3.0209e-01,
2.3040e-01, -3.9351e-02, -2.1078e-01, -2.7224e-01, 1.6907e-01,
5.4819e-01, 9.4888e-02, 7.9798e-01, -6.6158e-02, 1.9844e-01,
2.0307e-01, 4.4808e-02, -1.0240e-01, -6.9909e-02, -3.6756e-02,
9.5159e-02, -2.7830e-01, -1.0597e-01, -1.6276e-01, -1.8211e-01,
-3.1897e-01, -2.1633e-01, 1.4994e-01, -7.2057e-02, 2.2264e-01,
-4.5551e-01, 3.0341e-01, 1.8431e-01, 2.1681e-01, -3.1940e-01,
2.6426e-01, 5.8106e-01, 5.4635e-02, 6.3238e-01, 4.3169e-01,
9.0343e-02, 1.9494e-01, 3.5483e-01, -2.0706e-02, -7.3117e-01,
1.2941e-01, 1.7418e-01, -1.5065e-01, 5.3355e-02, 4.4794e-02,
-1.6600e-01, 2.2007e-01, -5.3970e-01, -2.4968e-01, -2.6464e-01,
-5.5515e-01, 5.8242e-01, 2.2295e-01, 2.4433e-01, 4.5275e-01,
3.4693e-01, 1.2255e-01, -3.9059e-02, -3.2749e-01, -2.7891e-01,
1.3766e-01, 3.8392e-01, 1.0543e-03, -1.0242e-02, 4.9205e-01,
-1.7922e-01, 4.1215e-02, 1.3547e-01, -2.0598e-01, -2.3194e-01,
-7.7701e-01, -3.8237e-01, -7.6383e-01, 1.9418e-01, -1.5441e-01,
8.9740e-01, 3.0626e-01, 4.0376e-01, 2.1738e-01, -3.8050e-01],
dtype=float32)
In [4]:
len(nlp(u'lion').vector)
Out[4]:
300
In [6]:
### vector output a sentence
sents = nlp(u'The quick brown fox jumped over a lazy dog')
sents.vector
Out[6]:
array([-2.29145795e-01, -3.46055627e-03, -5.60528897e-02, 7.85977766e-02,
9.12922155e-03, 1.10382795e-01, -1.26714230e-01, -7.90637732e-02,
1.31968096e-01, 1.86862671e+00, -2.59036660e-01, -9.45902020e-02,
-1.29910111e-01, -1.72616780e-01, -2.08401456e-01, 1.87006649e-02,
8.01126659e-02, 9.91218865e-01, 2.73393337e-02, -2.94356763e-01,
-1.45484447e-01, -9.41558629e-02, -5.07920086e-02, -1.69811353e-01,
1.41727030e-01, -8.95571038e-02, -1.84179127e-01, -1.76457226e-01,
1.65122882e-01, -2.20417902e-01, -1.91546515e-01, 2.51313895e-01,
6.73556626e-02, -5.30913323e-02, 1.28895223e-01, -5.74297756e-02,
7.14288801e-02, -1.10088736e-01, -8.49754438e-02, -1.26965329e-01,
2.06004441e-01, 7.04980046e-02, -6.15093037e-02, -1.66130662e-01,
1.48633450e-01, 4.71172146e-02, -1.81588233e-01, 5.00197560e-02,
1.43082336e-01, 2.85028890e-02, -2.06958458e-01, 2.00484216e-01,
5.18219313e-04, 2.13606693e-02, -8.42414424e-02, 6.82437792e-02,
3.52840014e-02, 1.79329336e-01, -9.48552191e-02, -8.99764448e-02,
6.93147480e-02, 1.17364325e-01, 2.81073321e-02, 9.52875614e-02,
1.40404105e-01, -1.61580786e-01, 1.07955532e-02, 4.05195542e-02,
5.06632291e-02, 6.59189969e-02, 1.82070043e-02, 8.42595622e-02,
6.82728887e-02, 3.44687775e-02, 7.74068907e-02, -1.00541331e-01,
-7.22397119e-02, -1.36631444e-01, 7.82399997e-03, 6.45366637e-03,
-3.64300050e-02, 1.83236331e-01, -4.48207781e-02, -5.04070148e-02,
1.28827030e-02, -2.76747774e-02, 4.84310776e-01, 2.93981403e-01,
2.97116339e-01, 8.06249902e-02, -7.29233325e-02, 1.58352450e-01,
3.50650884e-02, -1.50348455e-01, -5.20066619e-02, 5.10889962e-02,
-1.08953886e-01, -3.26871127e-02, 1.82315543e-01, -1.76915884e-01,
7.51234517e-02, 3.48288864e-02, -1.47218555e-01, -5.25834449e-02,
8.49288851e-02, -7.30551064e-01, 2.56704897e-01, -7.00142235e-02,
5.10252193e-02, -8.52812231e-02, 6.54618889e-02, -1.08713120e-01,
1.79649666e-01, -1.94106445e-01, 3.34541090e-02, 1.53969109e-01,
-1.62373334e-02, 2.23454423e-02, 1.26356453e-01, 8.19555596e-02,
7.33288899e-02, -1.39666215e-01, -5.76875471e-02, -1.54199898e-01,
-1.81703672e-01, 8.44638869e-02, -1.19984429e-02, -9.62789953e-02,
-8.59419480e-02, 1.57964393e-03, -9.74553302e-02, -4.44773361e-02,
-1.70016795e-01, 1.30081773e-01, 1.18517242e-01, -1.47740498e-01,
-1.51207790e-01, -2.60798894e-02, 7.20583089e-03, -5.91398887e-02,
-2.04430008e+00, -4.88525555e-02, 1.42633557e-01, 1.18846670e-01,
1.86597764e-01, -1.98611796e-01, 3.69429886e-02, -2.92323399e-02,
1.74508438e-01, 2.44701132e-02, 7.42663443e-02, -3.51392217e-02,
-1.06225409e-04, -4.92834412e-02, -2.45728999e-01, 5.33285514e-02,
-1.16909109e-01, 1.55293569e-01, 4.15556654e-02, -2.11775109e-01,
-6.41415492e-02, -1.29796550e-01, -1.74706772e-01, -1.03955440e-01,
-6.71750009e-02, -9.75381285e-02, -5.64488955e-02, -5.04329987e-02,
-1.09255046e-01, -1.09185778e-01, 1.22292109e-01, -6.37411792e-03,
-1.37798786e-01, -1.17282547e-01, -2.98780531e-01, 4.50878032e-03,
1.51448250e-01, 4.59450036e-02, -3.90139967e-02, 1.08529776e-01,
-3.31053324e-02, -2.43077219e-01, -2.40157679e-01, -2.18712226e-01,
-9.24932957e-03, 1.09542698e-01, -1.14373304e-02, 5.49983308e-02,
5.12482189e-02, -1.29175857e-01, 5.50087690e-02, 2.40794420e-02,
-2.66746879e-02, -4.12909985e-02, 9.04988647e-02, 1.63671792e-01,
1.63387001e-01, 3.43656763e-02, 4.32012156e-02, -7.35577792e-02,
-1.93504453e-01, 5.87084442e-02, -2.94104666e-01, 2.24302337e-02,
1.98929116e-01, 1.08914085e-01, 5.39495535e-02, -1.01477228e-01,
6.67209327e-02, -1.59832574e-02, -4.15321141e-02, 1.00349993e-01,
-8.82177874e-02, 1.12334676e-01, 9.95865688e-02, 1.47073328e-01,
-2.19971225e-01, 1.68930113e-01, -9.82039869e-02, 1.46480769e-01,
7.54283294e-02, 7.47743398e-02, -4.67192121e-02, -2.89261602e-02,
-1.68541744e-01, -2.04066336e-01, 6.15419932e-02, 1.78478420e-01,
6.53144531e-03, 2.50683784e-01, -1.80575773e-01, 7.89033324e-02,
1.53888576e-02, -2.56483369e-02, 7.45135620e-02, -8.18474293e-02,
-1.20121110e-02, -7.43027106e-02, 9.36226696e-02, 1.48760885e-01,
-1.36678651e-01, -1.26202106e-01, -2.14737803e-02, 1.77118890e-02,
-2.12611686e-02, -7.99791217e-02, -7.95553401e-02, -9.02217776e-02,
-2.11785555e-01, 1.05856664e-01, -3.49383317e-02, -1.37948439e-01,
-3.86459231e-02, 1.60381228e-01, 1.18056178e-01, 1.20334335e-01,
1.32341668e-01, 1.83661059e-02, -4.36116830e-02, -1.62338875e-02,
1.18117318e-01, 1.23674564e-01, 1.21527009e-01, -1.65413886e-01,
2.28357241e-01, -1.64023101e-01, 1.39535554e-02, -1.18618160e-01,
-8.61809924e-02, 4.60525565e-02, 6.05534203e-03, 1.59306556e-01,
-4.25856374e-02, -1.23108894e-01, -3.38825025e-02, -3.24287675e-02,
4.69944486e-03, 4.22910005e-02, 5.81285506e-02, 3.38969007e-02,
1.62819222e-01, 9.96906757e-02, -8.49305466e-02, 1.44411981e-01,
6.18494488e-02, -5.18973358e-02, 6.54534400e-02, -4.85357717e-02,
5.73443016e-04, 1.92629937e-02, -1.51506230e-01, 5.55032529e-02,
-2.19598915e-02, -1.37180611e-01, -3.97436693e-02, 1.23321891e-01,
7.16196671e-02, 5.71780056e-02, -1.28691018e-01, 3.99866141e-03],
dtype=float32)
In [7]:
token = nlp(u'lion cat pets')
for token1 in token:
for token2 in token:
print(token1, token2, token1.similarity(token2))
lion lion 1.0
lion cat 0.5265437
lion pets 0.2913302
cat lion 0.5265437
cat cat 1.0
cat pets 0.64571816
pets lion 0.2913302
pets cat 0.64571816
pets pets 1.0
In [10]:
### display as markdown
a,b,c = token
from IPython.display import Markdown, display
display(Markdown(f'<table><tr><th></th><th>{a.text}</th><th>{b.text}</th><th>{c.text}</th></tr>\
<tr><td>**{a.text}**</td><td>{a.similarity(a):{.4}}</td><td>{b.similarity(a):{.4}}</td><td>{c.similarity(a):{.4}}</td></tr>\
<tr><td>**{b.text}**</td><td>{a.similarity(b):{.4}}</td><td>{b.similarity(b):{.4}}</td><td>{c.similarity(b):{.4}}</td></tr>\
<tr><td>**{c.text}**</td><td>{a.similarity(c):{.4}}</td><td>{b.similarity(c):{.4}}</td><td>{c.similarity(c):{.4}}</td></tr>'))
lion cat pets **lion** 1.0 0.5265 0.2913 **cat** 0.5265 1.0 0.6457 **pets** 0.2913 0.6457 1.0
In [19]:
nlp(u'lion').similarity(nlp(u'tiger'))
Out[19]:
0.7359829457249657
In [ ]:
Content source: rishuatgithub/MLPy
Similar notebooks: